# ================================================================================== #
# Complaints About Police Misconduct Have Adverse Effects for Black Civilians (PSRM)
# - Script: Appendix B (Robustness Checks)
# - Author: Patrick Kraft (patrickwilli.kraft@uc3m.es)
# ================================================================================== #


# Load raw data, packages, and custom functions ---------------------------

source(here::here("00-func.R"))
load(here("sqf_ccrb.Rdata"))


# Appendix B.1: Replication Using Alternative Matching of Complaints --------------

m2altmatch <- NULL

## variable selection for VAR
endo <- c("sqf_d","ccrb_altmatch_d","arrests_d")
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## all incidents
tmp <-  na.omit(left_join(total, controls)[, c(endo, exo)])
m2altmatch$total <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## white complainants / suspects
tmp <-  na.omit(left_join(white, controls)[, c(endo, exo)])
m2altmatch$white <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## black complainants / suspects
tmp <-  na.omit(left_join(black, controls)[, c(endo, exo)])
m2altmatch$black <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## hispanic complainants / suspects
tmp <-  na.omit(left_join(hispanic, controls)[, c(endo, exo)])
m2altmatch$hispanic <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## prepare estimates for plot
m2altmatch_coef <- map_dfr(m2altmatch, extractCoefs, .id = "id") %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m2altmatch[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",4:1)),
         Group = factor(id, levels = c("total", "white", "black", "hispanic"), 
                        labels = c("All CCRB Complaints / SQF Incidents","Whites (in CCRB & SQF)",
                                   "Blacks (in CCRB & SQF)","Latinos (in CCRB & SQF)")))

## coefficient plot
p2a <- ggplot(m2altmatch_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales="free_y") +
  geom_text(label = paste0("p = ", m2altmatch_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

## simulate series
m2altmatch_series <- rbind(
  varSim(m2altmatch[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_altmatch_d = max(total[,"ccrb_altmatch_d"],na.rm = T)), 
         level = apply(total[,c("sqf","ccrb_altmatch","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "All CCRB Complaints / SQF Incidents", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m2altmatch[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_altmatch_d = max(white[,"ccrb_altmatch_d"],na.rm = T)),
         level = apply(white[,c("sqf","ccrb_altmatch","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Whites (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2altmatch[[3]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_altmatch_d = max(black[,"ccrb_altmatch_d"],na.rm = T)),
         level = apply(black[,c("sqf","ccrb_altmatch","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Blacks (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2altmatch[[4]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_altmatch_d = max(hispanic[,"ccrb_altmatch_d"],na.rm = T)),
         level = apply(hispanic[,c("sqf","ccrb_altmatch","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Latinos (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("All CCRB Complaints / SQF Incidents",
                                          "Whites (in CCRB & SQF)",
                                          "Blacks (in CCRB & SQF)",
                                          "Latinos (in CCRB & SQF)")))

## compute confidence intervals for simulations
m2altmatch_ci <- m2altmatch_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m2altmatch_text <- m2altmatch_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(total[,"ccrb_altmatch_d"],na.rm = T), max(white[,"ccrb_altmatch_d"],na.rm = T),
                   max(black[,"ccrb_altmatch_d"],na.rm = T), max(hispanic[,"ccrb_altmatch_d"],na.rm = T)),
         shock = paste0(" + ",round(shock, 1) * 100," complaints"),
         time = 0)

## plot simulations
p2b <- ggplot(m2altmatch_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + 
  geom_vline(xintercept = 2) + facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") + 
  ggtitle("(B) Predicted Monthly SQF Incidents") +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m2altmatch_ci[m2altmatch_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m2altmatch_ci[m2altmatch_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m2altmatch_ci[m2altmatch_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m2altmatch_text)

## save combined plot
ggsave("out/figB1_altmatch.png", 
       grid.arrange(p2a, p2b, ncol=2), 
       height=5.5, width=6.5)


# Appendix B.2: Replication Using Constant Lag Order of 3 -----------------

m2l3 <- NULL

## variable selection for VAR
endo <- c("sqf_d","ccrb_d","arrests_d")
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## all incidents
tmp <-  na.omit(left_join(total, controls)[, c(endo, exo)])
m2l3$total <- VAR(tmp[,endo], exogen = tmp[,exo], p=3)

## white complainants / suspects
tmp <-  na.omit(left_join(white, controls)[, c(endo, exo)])
m2l3$white <- VAR(tmp[,endo], exogen = tmp[,exo], p=3)

## black complainants / suspects
tmp <-  na.omit(left_join(black, controls)[, c(endo, exo)])
m2l3$black <- VAR(tmp[,endo], exogen = tmp[,exo], p=3)

## hispanic complainants / suspects
tmp <-  na.omit(left_join(hispanic, controls)[, c(endo, exo)])
m2l3$hispanic <- VAR(tmp[,endo], exogen = tmp[,exo], p=3)

## prepare estimates for plot
m2l3_coef <- map_dfr(m2l3, extractCoefs, .id = "id") %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m2l3[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",4:1)),
         Group = factor(id, levels = c("total", "white", "black", "hispanic"), 
                        labels = c("All CCRB Complaints / SQF Incidents","Whites (in CCRB & SQF)",
                                   "Blacks (in CCRB & SQF)","Latinos (in CCRB & SQF)")))

## coefficient plot
p2a <- ggplot(m2l3_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales="free_y") +
  geom_text(label = paste0("p = ", m2l3_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

## simulate series
m2l3_series <- rbind(
  varSim(m2l3[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(total[,"ccrb_d"],na.rm = T)), 
         level = apply(total[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "All CCRB Complaints / SQF Incidents", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m2l3[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(white[,"ccrb_d"],na.rm = T)),
         level = apply(white[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Whites (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2l3[[3]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)),
         level = apply(black[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Blacks (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2l3[[4]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(hispanic[,"ccrb_d"],na.rm = T)),
         level = apply(hispanic[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Latinos (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("All CCRB Complaints / SQF Incidents",
                                          "Whites (in CCRB & SQF)",
                                          "Blacks (in CCRB & SQF)",
                                          "Latinos (in CCRB & SQF)")))

## compute confidence intervals for simulations
m2l3_ci <- m2l3_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m2l3_text <- m2l3_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(total[,"ccrb_d"],na.rm = T), max(white[,"ccrb_d"],na.rm = T),
                   max(black[,"ccrb_d"],na.rm = T), max(hispanic[,"ccrb_d"],na.rm = T)),
         shock = paste0(" + ",round(shock, 1) * 100," complaints"),
         time = 0)

## plot simulations
p2b <- ggplot(m2l3_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + 
  geom_vline(xintercept = 2) + facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") + 
  ggtitle("(B) Predicted Monthly SQF Incidents") +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m2l3_ci[m2l3_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m2l3_ci[m2l3_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m2l3_ci[m2l3_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m2l3_text)

## save combined plot
ggsave("out/figB2_lag3.png", 
       grid.arrange(p2a, p2b, ncol=2), 
       height=5.5, width=6.5)


# Appendix B.3: Replication Using Constant Lag Order of 5 -----------------

m2l5 <- NULL

## variable selection for VAR
endo <- c("sqf_d","ccrb_d","arrests_d")
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## all incidents
tmp <-  na.omit(left_join(total, controls)[, c(endo, exo)])
m2l5$total <- VAR(tmp[,endo], exogen = tmp[,exo], p=5)

## white complainants / suspects
tmp <-  na.omit(left_join(white, controls)[, c(endo, exo)])
m2l5$white <- VAR(tmp[,endo], exogen = tmp[,exo], p=5)

## black complainants / suspects
tmp <-  na.omit(left_join(black, controls)[, c(endo, exo)])
m2l5$black <- VAR(tmp[,endo], exogen = tmp[,exo], p=5)

## hispanic complainants / suspects
tmp <-  na.omit(left_join(hispanic, controls)[, c(endo, exo)])
m2l5$hispanic <- VAR(tmp[,endo], exogen = tmp[,exo], p=5)

## prepare estimates for plot
m2l5_coef <- map_dfr(m2l5, extractCoefs, .id = "id") %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m2l5[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",5:1)),
         Group = factor(id, levels = c("total", "white", "black", "hispanic"), 
                        labels = c("All CCRB Complaints / SQF Incidents","Whites (in CCRB & SQF)",
                                   "Blacks (in CCRB & SQF)","Latinos (in CCRB & SQF)")))

## coefficient plot
p2a <- ggplot(m2l5_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales="free_y") +
  geom_text(label = paste0("p = ", m2l5_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

## simulate series
m2l5_series <- rbind(
  varSim(m2l5[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(total[,"ccrb_d"],na.rm = T)), 
         level = apply(total[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "All CCRB Complaints / SQF Incidents", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m2l5[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(white[,"ccrb_d"],na.rm = T)),
         level = apply(white[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Whites (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2l5[[3]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)),
         level = apply(black[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Blacks (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2l5[[4]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(hispanic[,"ccrb_d"],na.rm = T)),
         level = apply(hispanic[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Latinos (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("All CCRB Complaints / SQF Incidents",
                                          "Whites (in CCRB & SQF)",
                                          "Blacks (in CCRB & SQF)",
                                          "Latinos (in CCRB & SQF)")))

## compute confidence intervals for simulations
m2l5_ci <- m2l5_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m2l5_text <- m2l5_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(total[,"ccrb_d"],na.rm = T), max(white[,"ccrb_d"],na.rm = T),
                   max(black[,"ccrb_d"],na.rm = T), max(hispanic[,"ccrb_d"],na.rm = T)),
         shock = paste0(" + ",round(shock, 1) * 100," complaints"),
         time = 0)

## plot simulations
p2b <- ggplot(m2l5_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + 
  geom_vline(xintercept = 2) + facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") + 
  ggtitle("(B) Predicted Monthly SQF Incidents") +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m2l5_ci[m2l5_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m2l5_ci[m2l5_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m2l5_ci[m2l5_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m2l5_text)

## save combined plot
ggsave("out/figB3_lag5.png", 
       grid.arrange(p2a, p2b, ncol=2), 
       height=5.5, width=6.5)


# Appendix B.4: Replication Controlling for Crime (Reported Feloni --------

## Create multiple imputations
imp_total <- amelia_wrapper(left_join(total, controls))
imp_white <- amelia_wrapper(left_join(white, controls))
imp_black <- amelia_wrapper(left_join(black, controls))
imp_hispanic <- amelia_wrapper(left_join(hispanic, controls))

## Combine imputations in single list
imp <- vector("list", imp_total$m)
for(i in 1:imp_total$m){
  imp[[i]] <- imp_total$imputations[[i]] %>% 
    mutate(sqf_white_d = imp_white$imputations[[i]]$sqf_d,
           ccrb_white_d = imp_white$imputations[[i]]$ccrb_d,
           crime_white_d = imp_white$imputations[[i]]$crime_d,
           sqf_black_d = imp_black$imputations[[i]]$sqf_d,
           ccrb_black_d = imp_black$imputations[[i]]$ccrb_d,
           crime_black_d = imp_black$imputations[[i]]$crime_d,
           sqf_hispanic_d = imp_hispanic$imputations[[i]]$sqf_d,
           ccrb_hispanic_d = imp_hispanic$imputations[[i]]$ccrb_d,
           crime_hispanic_d = imp_hispanic$imputations[[i]]$crime_d,
           imputation = as.character(i))
}

## Check imputations
map_dfr(imp, bind_rows) %>%
  select(date, imputation, crime_d, 
         crime_white_d, crime_black_d, crime_hispanic_d) %>%
  gather(series, crime, -date, -imputation) %>%
  mutate(Group = factor(series, 
                        levels = c("crime_d", "crime_white_d", "crime_black_d", "crime_hispanic_d"), 
                        labels = c("Total","Whites","Blacks","Latinos"))) %>%
  ggplot(aes(x = date, y = crime, group = imputation)) +
  geom_line(alpha = .2) + facet_wrap(~Group, scales="free_y") + plot_default +
  ggtitle("Imputed Crime Data") + ylab("Change in Monthly Number of Reported Felonies (x 1000)") + xlab("Time") +
  theme(legend.position="bottom")

## Save plot
ggsave("out/figB4a_impoview.png", height=3.5, width=5.5)

## Variable selection for VAR
endo <- c("sqf_d","ccrb_d","arrests_d")
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## All incidents
m2crime <- NULL
m2crime[[1]] <- map(imp, ~VAR(.[,endo], exogen = .[,c("crime_d", exo)], p=4))

## By race
race <- c("white","black","hispanic")
for(i in race){
  m2crime[[i]] <- map(imp, ~VAR(.[, c(paste0(c("sqf_","ccrb_"),i,"_d"), "arrests_d")], 
                                exogen = .[,c(paste0("crime_",i,"_d"),exo)], p=4))
}

## prepare estimates for plot
m2crime_coef <- rbind(map_dfr(m2crime[[1]], ~prePlot(.$varresult, NeweyWest = F, report = "ccrb"), 
                              .id = "imputation")  %>% filter(Model=="sqf_d"),
                      map_dfr(m2crime[[2]], ~prePlot(.$varresult, NeweyWest = F, report = "ccrb"), 
                              .id = "imputation")  %>% filter(Model=="sqf_white_d"),
                      map_dfr(m2crime[[3]], ~prePlot(.$varresult, NeweyWest = F, report = "ccrb"), 
                              .id = "imputation")  %>% filter(Model=="sqf_black_d"),
                      map_dfr(m2crime[[4]], ~prePlot(.$varresult, NeweyWest = F, report = "ccrb"), 
                              .id = "imputation")  %>% filter(Model=="sqf_hispanic_d")) %>%
  group_by(Model, Variables) %>%
  summarize(m = length(unique(imputation)),
            Estimate_bar = mean(Estimate),
            SE_bar = sqrt(mean(SE^2) + sum((Estimate - mean(Estimate))^2)/(m-1) * (1 + 1/m))) %>%
  mutate(p = round(2* pnorm(abs(Estimate_bar/SE_bar), lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",5:1)),
         Group = factor(Model, levels = c("sqf_d", "sqf_white_d", "sqf_black_d", "sqf_hispanic_d"), 
                        labels = c("All CCRB Complaints / SQF Incidents","Whites (in CCRB & SQF)",
                                   "Blacks (in CCRB & SQF)","Latinos (in CCRB & SQF)")))

## save plot
ggplot(m2crime_coef, aes(y=Estimate_bar, ymin=Estimate_bar-1.96*SE_bar, 
                                ymax=Estimate_bar+1.96*SE_bar, x=CCRB)) + plot_default +
  geom_linerange() + geom_point(size=1) + coord_flip() + facet_wrap(~Group, ncol=2) +
  geom_text(label = paste0("p = ", m2crime_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("Estimated Effect on SQF Incidents") + ylab("Coefficient")
ggsave("out/figB4b_impcoefs.png", height=3.5, width=5.5)


# Appendix B.5: Replication with Endogenous Media Coverage ----------------

m2media <- NULL

## variable selection for VAR
endo <- c("sqf_d","ccrb_d","arrests_d","media_d")
exo <- c("unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## all incidents
tmp <-  na.omit(left_join(total, controls)[, c(endo, exo)])
m2media$total <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## white complainants / suspects
tmp <-  na.omit(left_join(white, controls)[, c(endo, exo)])
m2media$white <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## black complainants / suspects
tmp <-  na.omit(left_join(black, controls)[, c(endo, exo)])
m2media$black <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## hispanic complainants / suspects
tmp <-  na.omit(left_join(hispanic, controls)[, c(endo, exo)])
m2media$hispanic <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## prepare estimates for plot
m2media_coef <- map_dfr(m2media, extractCoefs, .id = "id") %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m2media[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",5:1)),
         Group = factor(id, levels = c("total", "white", "black", "hispanic"), 
                        labels = c("All CCRB Complaints / SQF Incidents","Whites (in CCRB & SQF)",
                                   "Blacks (in CCRB & SQF)","Latinos (in CCRB & SQF)")))

## coefficient plot
p2a <- ggplot(m2media_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales="free_y") +
  geom_text(label = paste0("p = ", m2media_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

## simulate series
m2media_series <- rbind(
  varSim(m2media[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(total[,"ccrb_d"],na.rm = T)), 
         level = apply(left_join(total, controls)[,c("sqf","ccrb","arrests","media")],2,mean,na.rm=T),
         vars = 4) %>%
    mutate(Group = "All CCRB Complaints / SQF Incidents", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m2media[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(white[,"ccrb_d"],na.rm = T)),
         level = apply(left_join(white, controls)[,c("sqf","ccrb","arrests","media")],2,mean,na.rm=T),
         vars = 4) %>%
    mutate(Group = "Whites (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2media[[3]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)),
         level = apply(left_join(black, controls)[,c("sqf","ccrb","arrests","media")],2,mean,na.rm=T),
         vars = 4) %>%
    mutate(Group = "Blacks (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2media[[4]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(hispanic[,"ccrb_d"],na.rm = T)),
         level = apply(left_join(hispanic, controls)[,c("sqf","ccrb","arrests","media")],2,mean,na.rm=T),
         vars = 4) %>%
    mutate(Group = "Latinos (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("All CCRB Complaints / SQF Incidents",
                                          "Whites (in CCRB & SQF)",
                                          "Blacks (in CCRB & SQF)",
                                          "Latinos (in CCRB & SQF)")))

## compute confidence intervals for simulations
m2media_ci <- m2media_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m2media_text <- m2media_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(total[,"ccrb_d"],na.rm = T), max(white[,"ccrb_d"],na.rm = T),
                   max(black[,"ccrb_d"],na.rm = T), max(hispanic[,"ccrb_d"],na.rm = T)),
         shock = paste0(" + ",round(shock, 1) * 100," complaints"),
         time = 0)

## plot simulations
p2b <- ggplot(m2media_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + 
  geom_vline(xintercept = 2) + facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") + 
  ggtitle("(B) Predicted Monthly SQF Incidents") +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m2media_ci[m2media_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m2media_ci[m2media_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m2media_ci[m2media_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m2media_text)

## save combined plot
ggsave("out/figB5_endomedia.png", 
       grid.arrange(p2a, p2b, ncol=2), 
       height=5.5, width=6.5)


# Appendix B.6: Results using Reduced Data (prior to 2013) ----------------

m2012 <- NULL

## variable selection for VAR
endo <- c("sqf_d","ccrb_d","arrests_d")
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## all incidents
tmp <-  na.omit(filter(left_join(total, controls), date < "2013-01-01")[, c(endo, exo)])
m2012$total <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## white complainants / suspects
tmp <-  na.omit(filter(left_join(white, controls), date < "2013-01-01")[, c(endo, exo)])
m2012$white <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## black complainants / suspects
tmp <-  na.omit(filter(left_join(black, controls), date < "2013-01-01")[, c(endo, exo)])
m2012$black <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## hispanic complainants / suspects
tmp <-  na.omit(filter(left_join(hispanic, controls), date < "2013-01-01")[, c(endo, exo)])
m2012$hispanic <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## prepare estimates for plot
m2012_coef <- map_dfr(m2012, extractCoefs, .id = "id") %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m2012[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",4:1)),
         Group = factor(id, levels = c("total", "white", "black", "hispanic"), 
                        labels = c("All CCRB Complaints / SQF Incidents","Whites (in CCRB & SQF)",
                                   "Blacks (in CCRB & SQF)","Latinos (in CCRB & SQF)")))

## coefficient plot
p2a <- ggplot(m2012_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales="free_y") +
  geom_text(label = paste0("p = ", m2012_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

## simulate series
m2012_series <- rbind(
  varSim(m2012[[1]], dumvar = apply(filter(controls, date < "2013-01-01")[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(filter(total, date < "2013-01-01")[,"ccrb_d"],na.rm = T)), 
         level = apply(filter(total, date < "2013-01-01")[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "All CCRB Complaints / SQF Incidents", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m2012[[2]], dumvar = apply(filter(controls, date < "2013-01-01")[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(filter(white, date < "2013-01-01")[,"ccrb_d"],na.rm = T)),
         level = apply(filter(white, date < "2013-01-01")[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Whites (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2012[[3]], dumvar = apply(filter(controls, date < "2013-01-01")[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(filter(black, date < "2013-01-01")[,"ccrb_d"],na.rm = T)),
         level = apply(filter(black, date < "2013-01-01")[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Blacks (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2012[[4]], dumvar = apply(filter(controls, date < "2013-01-01")[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(filter(hispanic, date < "2013-01-01")[,"ccrb_d"],na.rm = T)),
         level = apply(filter(hispanic, date < "2013-01-01")[,c("sqf","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Latinos (in CCRB & SQF)", SQF = sqf_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("All CCRB Complaints / SQF Incidents",
                                          "Whites (in CCRB & SQF)",
                                          "Blacks (in CCRB & SQF)",
                                          "Latinos (in CCRB & SQF)")))

## compute confidence intervals for simulations
m2012_ci <- m2012_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m2012_text <- m2012_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(filter(total, date < "2013-01-01")[,"ccrb_d"],na.rm = T), 
                   max(filter(white, date < "2013-01-01")[,"ccrb_d"],na.rm = T),
                   max(filter(black, date < "2013-01-01")[,"ccrb_d"],na.rm = T), 
                   max(filter(hispanic, date < "2013-01-01")[,"ccrb_d"],na.rm = T)),
         shock = paste0(" + ",round(shock, 1) * 100," complaints"),
         time = 0)

## plot simulations
p2b <- ggplot(m2012_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + 
  geom_vline(xintercept = 2) + facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") + 
  ggtitle("(B) Predicted Monthly SQF Incidents") +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m2012_ci[m2012_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m2012_ci[m2012_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m2012_ci[m2012_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m2012_text)

## save combined plot
ggsave("out/figB6_2012.png", 
       grid.arrange(p2a, p2b, ncol=2), 
       height=5.5, width=6.5)


# Appendix B.7: Focusing on SQF Incidents Involving Use of Force ----------

m2force <- NULL

## variable selection for VAR
endo <- c("sqf_force_d","ccrb_d","arrests_d")
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## all incidents
tmp <-  na.omit(left_join(total, controls)[, c(endo, exo)])
m2force$total <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## white complainants / suspects
tmp <-  na.omit(left_join(white, controls)[, c(endo, exo)])
m2force$white <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## black complainants / suspects
tmp <-  na.omit(left_join(black, controls)[, c(endo, exo)])
m2force$black <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## hispanic complainants / suspects
tmp <-  na.omit(left_join(hispanic, controls)[, c(endo, exo)])
m2force$hispanic <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## prepare estimates for plot
m2force_coef <- map_dfr(m2force, extractCoefs, model = "sqf_force_d", .id = "id") %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m2force[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",5:1)),
         Group = factor(id, levels = c("total", "white", "black", "hispanic"), 
                        labels = c("All CCRB Complaints / SQF Incidents Involving Use of Force",
                                   "Whites (in CCRB & SQF /w Use of Force)",
                                   "Blacks (in CCRB & SQF /w Use of Force)",
                                   "Latinos (in CCRB & SQF /w Use of Force)")))

## coefficient plot
p2a <- ggplot(m2force_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=1, scales="free_y") +
  geom_text(label = paste0("p = ", m2force_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("(A) Estimated Effect on SQF Incidents") + ylab("Coefficient")

## simulate series
m2force_series <- rbind(
  varSim(m2force[[1]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)), 
         shock = list(ccrb_d = max(total[,"ccrb_d"],na.rm = T)), 
         level = apply(total[,c("sqf_force","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "All CCRB Complaints / SQF Incidents Involving Use of Force", SQF = sqf_force_d.1) %>%
    dplyr::select(SQF,time,iter,Group), 
  varSim(m2force[[2]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(white[,"ccrb_d"],na.rm = T)),
         level = apply(white[,c("sqf_force","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Whites (in CCRB & SQF /w Use of Force)", SQF = sqf_force_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2force[[3]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(black[,"ccrb_d"],na.rm = T)),
         level = apply(black[,c("sqf_force","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Blacks (in CCRB & SQF /w Use of Force)", SQF = sqf_force_d.1) %>%
    dplyr::select(SQF,time,iter,Group),
  varSim(m2force[[4]], dumvar = apply(controls[,exo],2,function(x) rep(mean(x, na.rm=T), 6)),
         shock = list(ccrb_d = max(hispanic[,"ccrb_d"],na.rm = T)),
         level = apply(hispanic[,c("sqf_force","ccrb","arrests")],2,mean,na.rm=T)) %>%
    mutate(Group = "Latinos (in CCRB & SQF /w Use of Force)", SQF = sqf_force_d.1) %>%
    dplyr::select(SQF,time,iter,Group)) %>%
  mutate(Group = factor(Group, levels = c("All CCRB Complaints / SQF Incidents Involving Use of Force",
                                          "Whites (in CCRB & SQF /w Use of Force)",
                                          "Blacks (in CCRB & SQF /w Use of Force)",
                                          "Latinos (in CCRB & SQF /w Use of Force)")))

## compute confidence intervals for simulations
m2force_ci <- m2force_series %>% group_by(time, Group) %>%
  summarize(avg=mean(SQF), cilo=quantile(SQF,.025), cihi=quantile(SQF,.975), SQF=min(SQF))
m2force_text <- m2force_ci %>% 
  group_by(Group) %>% summarize(SQF=min(SQF)) %>%
  mutate(shock = c(max(total[,"ccrb_d"],na.rm = T), max(white[,"ccrb_d"],na.rm = T),
                   max(black[,"ccrb_d"],na.rm = T), max(hispanic[,"ccrb_d"],na.rm = T)),
         shock = paste0(" + ",round(shock, 1) * 100," complaints"),
         time = 0)

## plot simulations
p2b <- ggplot(m2force_series, aes(x=as.factor(time), y=SQF)) + plot_default +
  geom_line(aes(group=iter), alpha=.01) + ylab("Number of Incidents (x 1000)") + 
  geom_vline(xintercept = 2) + facet_wrap(~Group,ncol=1,scales="free_y") + xlab("Month") + 
  ggtitle("(B) Predicted Monthly SQF Incidents") +
  geom_linerange(aes(ymin=cilo, ymax=cihi, x=as.factor(time)), data=m2force_ci[m2force_ci$time>0,]) +
  geom_point(aes(y=avg, x=as.factor(time)), size=1, data=m2force_ci[m2force_ci$time>0,]) +
  geom_segment(aes(x = 2, y = avg, xend = 6, yend = avg), lty="dotted", data=m2force_ci[m2force_ci$time==0,]) +
  geom_text(aes(label=shock), vjust=0, hjust=0, size=2, data=m2force_text)

## save combined plot
ggsave("out/figB7_force.png", 
       grid.arrange(p2a, p2b, ncol=2), 
       height=5.5, width=6.5)


# Appendix B.8: Temporal Placebo Test (Using Leads Instead of Lags) -------

m2reverse <- NULL

## variable selection for VAR
endo <- c("sqf_d","ccrb_d","arrests_d")
exo <- c("media_d", "unemp_d","visit_overseas_d",
         "temp_mean","precip_total", "jan")

## all incidents
tmp <-  na.omit(arrange(left_join(total, controls), desc(date))[, c(endo, exo)])
m2reverse$total <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## white complainants / suspects
tmp <-  na.omit(arrange(left_join(white, controls), desc(date))[, c(endo, exo)])
m2reverse$white <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## black complainants / suspects
tmp <-  na.omit(arrange(left_join(black, controls), desc(date))[, c(endo, exo)])
m2reverse$black <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## hispanic complainants / suspects
tmp <-  na.omit(arrange(left_join(hispanic, controls), desc(date))[, c(endo, exo)])
m2reverse$hispanic <- VAR(tmp[,endo], exogen = tmp[,exo], lag.max=5, ic="AIC")

## prepare estimates for plot
m2reverse_coef <- map_dfr(m2reverse, extractCoefs, .id = "id") %>%
  mutate(p = round(2* pt(abs(Estimate/SE), df=m2reverse[[1]]$obs, lower.tail = F), 3),
         CCRB = factor(gsub(".*\\.l","Lag ", Variables), levels=paste0("Lag ",4:1)),
         Group = factor(id, levels = c("total", "white", "black", "hispanic"), 
                        labels = c("All CCRB Complaints / SQF Incidents","Whites (in CCRB & SQF)",
                                   "Blacks (in CCRB & SQF)","Latinos (in CCRB & SQF)")))

## coefficient plot
ggplot(m2reverse_coef, aes(y=Estimate, ymin=Estimate-1.96*SE, ymax=Estimate+1.96*SE, x=CCRB)) + 
  plot_default + geom_linerange() + geom_point(size=1) + coord_flip() + 
  facet_wrap(~Group, ncol=2, scales="free_y") +
  geom_text(label = paste0("p = ", m2reverse_coef$p), nudge_x = .4, size = 2) +
  geom_hline(yintercept=0, linetype="dotted") + xlab("CCRB Complaints") +
  ggtitle("Estimated Effect on SQF Incidents") + ylab("Coefficient")

## save plot
ggsave("out/figB8_reverse.png", height=3.5, width=5.5)

